Getting Data from SQLite Database (10 points)

dbdriver = dbDriver('SQLite')
connect = dbConnect(dbdriver, dbname = 'vehicles.db')
d = dbGetQuery(connect, 'select * from Cars')
d = data.table(d)
d$index = NULL
head(d, 10)
##     year       make               model          VClass cylinders displ
##  1: 1985 Alfa Romeo  Spider Veloce 2000     Two Seaters         4   2.0
##  2: 1985    Ferrari          Testarossa     Two Seaters        12   4.9
##  3: 1985      Dodge             Charger Subcompact Cars         4   2.2
##  4: 1985      Dodge B150/B250 Wagon 2WD            Vans         8   5.2
##  5: 1993     Subaru    Legacy AWD Turbo    Compact Cars         4   2.2
##  6: 1993     Subaru              Loyale    Compact Cars         4   1.8
##  7: 1993     Subaru              Loyale    Compact Cars         4   1.8
##  8: 1993     Toyota             Corolla    Compact Cars         4   1.6
##  9: 1993     Toyota             Corolla    Compact Cars         4   1.6
## 10: 1993     Toyota             Corolla    Compact Cars         4   1.8
##               trany city08 highway08 comb08
##  1:    Manual 5-spd     19        25     21
##  2:    Manual 5-spd      9        14     11
##  3:    Manual 5-spd     23        33     27
##  4: Automatic 3-spd     10        12     11
##  5:    Manual 5-spd     17        23     19
##  6: Automatic 3-spd     21        24     22
##  7:    Manual 5-spd     22        29     25
##  8: Automatic 3-spd     23        26     24
##  9:    Manual 5-spd     23        31     26
## 10: Automatic 4-spd     23        30     25
summary(d)
##       year          make              model              VClass         
##  Min.   :1984   Length:35719       Length:35719       Length:35719      
##  1st Qu.:1990   Class :character   Class :character   Class :character  
##  Median :1999   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1999                                                           
##  3rd Qu.:2008                                                           
##  Max.   :2016                                                           
##    cylinders          displ          trany               city08     
##  Min.   : 2.000   Min.   :0.600   Length:35719       Min.   : 6.00  
##  1st Qu.: 4.000   1st Qu.:2.200   Class :character   1st Qu.:15.00  
##  Median : 6.000   Median :3.000   Mode  :character   Median :17.00  
##  Mean   : 5.743   Mean   :3.328                      Mean   :17.54  
##  3rd Qu.: 6.000   3rd Qu.:4.300                      3rd Qu.:20.00  
##  Max.   :16.000   Max.   :8.400                      Max.   :53.00  
##    highway08         comb08     
##  Min.   : 9.00   Min.   : 7.00  
##  1st Qu.:20.00   1st Qu.:16.00  
##  Median :23.00   Median :19.00  
##  Mean   :23.68   Mean   :19.79  
##  3rd Qu.:27.00   3rd Qu.:22.00  
##  Max.   :61.00   Max.   :53.00

Converting to Factors (10 points)

# fastest way to convert multiple factors
convert_cols = c('make', 'VClass', 'cylinders', 'trany')
d[, (convert_cols) := lapply(.SD, factor), .SDcols = convert_cols]
##        year       make               model          VClass cylinders displ
##     1: 1985 Alfa Romeo  Spider Veloce 2000     Two Seaters         4   2.0
##     2: 1985    Ferrari          Testarossa     Two Seaters        12   4.9
##     3: 1985      Dodge             Charger Subcompact Cars         4   2.2
##     4: 1985      Dodge B150/B250 Wagon 2WD            Vans         8   5.2
##     5: 1993     Subaru    Legacy AWD Turbo    Compact Cars         4   2.2
##    ---                                                                    
## 35715: 1993     Subaru              Legacy    Compact Cars         4   2.2
## 35716: 1993     Subaru              Legacy    Compact Cars         4   2.2
## 35717: 1993     Subaru          Legacy AWD    Compact Cars         4   2.2
## 35718: 1993     Subaru          Legacy AWD    Compact Cars         4   2.2
## 35719: 1993     Subaru    Legacy AWD Turbo    Compact Cars         4   2.2
##                  trany city08 highway08 comb08
##     1:    Manual 5-spd     19        25     21
##     2:    Manual 5-spd      9        14     11
##     3:    Manual 5-spd     23        33     27
##     4: Automatic 3-spd     10        12     11
##     5:    Manual 5-spd     17        23     19
##    ---                                        
## 35715: Automatic 4-spd     19        26     22
## 35716:    Manual 5-spd     20        28     23
## 35717: Automatic 4-spd     18        24     21
## 35718:    Manual 5-spd     18        24     21
## 35719: Automatic 4-spd     16        21     18
summary(d)
##       year             make          model          
##  Min.   :1984   Chevrolet: 3635   Length:35719      
##  1st Qu.:1990   Ford     : 2958   Class :character  
##  Median :1999   Dodge    : 2465   Mode  :character  
##  Mean   :1999   GMC      : 2306                     
##  3rd Qu.:2008   Toyota   : 1821                     
##  Max.   :2016   BMW      : 1518                     
##                 (Other)  :21016                     
##                          VClass        cylinders         displ      
##  Compact Cars               : 5160   4      :13596   Min.   :0.600  
##  Subcompact Cars            : 4643   6      :12522   1st Qu.:2.200  
##  Midsize Cars               : 4035   8      : 7938   Median :3.000  
##  Standard Pickup Trucks     : 2354   5      :  759   Mean   :3.328  
##  Sport Utility Vehicle - 4WD: 2090   12     :  505   3rd Qu.:4.300  
##  Two Seaters                : 1734   3      :  195   Max.   :8.400  
##  (Other)                    :15703   (Other):  204                  
##              trany           city08        highway08         comb08     
##  Automatic 4-spd:11035   Min.   : 6.00   Min.   : 9.00   Min.   : 7.00  
##  Manual 5-spd   : 8252   1st Qu.:15.00   1st Qu.:20.00   1st Qu.:16.00  
##  Automatic 3-spd: 3151   Median :17.00   Median :23.00   Median :19.00  
##  Manual 6-spd   : 2206   Mean   :17.54   Mean   :23.68   Mean   :19.79  
##  Automatic (S6) : 2201   3rd Qu.:20.00   3rd Qu.:27.00   3rd Qu.:22.00  
##  (Other)        : 8872   Max.   :53.00   Max.   :61.00   Max.   :53.00  
##  NA's           :    2

Filter Down Data (10 points)

i = names(which(table(d$VClass) >= 40))
d = d[VClass %in% i, ]
d
##        year       make               model          VClass cylinders displ
##     1: 1985 Alfa Romeo  Spider Veloce 2000     Two Seaters         4   2.0
##     2: 1985    Ferrari          Testarossa     Two Seaters        12   4.9
##     3: 1985      Dodge             Charger Subcompact Cars         4   2.2
##     4: 1985      Dodge B150/B250 Wagon 2WD            Vans         8   5.2
##     5: 1993     Subaru    Legacy AWD Turbo    Compact Cars         4   2.2
##    ---                                                                    
## 35704: 1993     Subaru              Legacy    Compact Cars         4   2.2
## 35705: 1993     Subaru              Legacy    Compact Cars         4   2.2
## 35706: 1993     Subaru          Legacy AWD    Compact Cars         4   2.2
## 35707: 1993     Subaru          Legacy AWD    Compact Cars         4   2.2
## 35708: 1993     Subaru    Legacy AWD Turbo    Compact Cars         4   2.2
##                  trany city08 highway08 comb08
##     1:    Manual 5-spd     19        25     21
##     2:    Manual 5-spd      9        14     11
##     3:    Manual 5-spd     23        33     27
##     4: Automatic 3-spd     10        12     11
##     5:    Manual 5-spd     17        23     19
##    ---                                        
## 35704: Automatic 4-spd     19        26     22
## 35705:    Manual 5-spd     20        28     23
## 35706: Automatic 4-spd     18        24     21
## 35707:    Manual 5-spd     18        24     21
## 35708: Automatic 4-spd     16        21     18
summary(d)
##       year             make          model          
##  Min.   :1984   Chevrolet: 3633   Length:35708      
##  1st Qu.:1990   Ford     : 2958   Class :character  
##  Median :1999   Dodge    : 2465   Mode  :character  
##  Mean   :1999   GMC      : 2302                     
##  3rd Qu.:2008   Toyota   : 1821                     
##  Max.   :2016   BMW      : 1518                     
##                 (Other)  :21011                     
##                          VClass        cylinders         displ      
##  Compact Cars               : 5160   4      :13594   Min.   :0.600  
##  Subcompact Cars            : 4643   6      :12518   1st Qu.:2.200  
##  Midsize Cars               : 4035   8      : 7933   Median :3.000  
##  Standard Pickup Trucks     : 2354   5      :  759   Mean   :3.328  
##  Sport Utility Vehicle - 4WD: 2090   12     :  505   3rd Qu.:4.300  
##  Two Seaters                : 1734   3      :  195   Max.   :8.400  
##  (Other)                    :15692   (Other):  204                  
##              trany           city08        highway08         comb08     
##  Automatic 4-spd:11026   Min.   : 6.00   Min.   : 9.00   Min.   : 7.00  
##  Manual 5-spd   : 8250   1st Qu.:15.00   1st Qu.:20.00   1st Qu.:16.00  
##  Automatic 3-spd: 3151   Median :17.00   Median :23.00   Median :19.00  
##  Manual 6-spd   : 2206   Mean   :17.54   Mean   :23.68   Mean   :19.79  
##  Automatic (S6) : 2201   3rd Qu.:20.00   3rd Qu.:27.00   3rd Qu.:22.00  
##  (Other)        : 8872   Max.   :53.00   Max.   :61.00   Max.   :53.00  
##  NA's           :    2

Fuel Economy of Vehicles of Different Makes (40 points)

d = d %>% group_by(VClass, make, year) %>% summarise(MPG = mean(comb08))

for (c in levels(d$VClass)) {
    p = d[d$VClass == c,]
    if(!nrow(p)) next
    g = ggplot(p, aes(x = year, y = MPG, col = make)) +
        geom_line() +
        ggtitle(c) + 
        labs(x = 'Year', y = 'Mean combined MPG') 
    print(g)
    p = p %>% group_by(make) %>% summarize(MPG = mean(MPG))
    g = ggplot(p, aes(x = reorder(make, -MPG), y = MPG)) +
        geom_bar(alpha = 0.6, stat = 'identity') +
        ggtitle(c) + 
        labs(x = 'Make', y = 'Mean combined MPG in ALL Years') + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1))
    print(g)
}